/* 
This file prepares an agency x month dataset 

*/ 

clear
set matsize 100
set more off 
# delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";

*********************************************************************************	
* Load firm x month x district panel
* this is a panel that includes any firm that sent non-zero migrants in 2005-2015;

	use ../dta_secure/2_firm_mn_dist_balanced.dta, clear; 
	

	
* merge in agency ratings data, which includes agency characteristics;
* note, there are no master only matches in this merge, so just drop agency chars for agencies
that are not in the panel ;

	merge m:1 agency_id using ../dta_secure/agency_ratings.dta, gen(merge_ratings) force; 	
	keep if merge_ratings == 3; 

	sort agency_id district year month;

	drop if year == 2016; //drops 2016 data since incomplete 

	destring agency_license_yr agency_blacklist_yr, replace;
	
	destring agency_license_yr agency_blacklist_yr, replace;
	replace agency_license_yr = . if agency_license_yr < 1980 ;	
	

	

*********************************************************************************	
* Go from firm X month X district to firm X month;

	rename *2012 agency_*2012;
	rename *2014 agency_*2014;
	rename license_no agency_license_no;
	drop *merge* _m_* *_50;
	
	rename agency_id id;
	rename agency_agency_name_2012 agency_name_2012;
	rename agency_agency_name_2014 agency_name_2014;
	
* note, stuff being summed, is from a firm x month x district file and that being
first'd is unique only at the agency level;


gen total_complaint_months = filed_cmpl_solv_months*filed_cmpl_solved;

	
	ds agency_* year month district id,not;
	collapse (sum) `r(varlist)' (first) agency_*, by(id year month);
	rename id agency_id;
	
	

*********************************************************************************;	
* Merge in job order data at firm-month-yr level; 
	
	rename agency_license_no license_no;
	merge 1:1 license_no year month using ../dta_secure/firm_joborder_month.dta, gen(_m_jos);
	drop if _m_jos == 2;
	
	sort agency_id year ;
	egen atag = tag(agency_id);
	

	
*********************************************************************************;	
* Clean data at firm-month level; 

* if agency does not have blacklist year, assume agency not blacklisted;
	replace agency_ever_bl = . if agency_blacklist_yr == . & agency_ever_bl == 1; //2 obs

	g new_firm = (agency_license_yr >=2005);
	la var new_firm "If firm licensed 2005 or after";
		
	g old_firm = (agency_license_yr <= 2004 & agency_license_yr!=.);
	la var old_firm "If firm licensed before 2005";

* sttime contains the age of the firm where 0 is the date the firm was licensed. years before the 
license date are negative;
			
	g sttime = 0 if new_firm == 1 & (year == agency_license_yr) & (month == agency_license_month);
	replace sttime = year-agency_license_yr if old_firm == 1;
	replace sttime = year-agency_license_yr if new_firm == 1 & sttime ==.;
	la var sttime "time	variable with first month of operation as 0";		

	g firm_age_yr = sttime;
	replace firm_age_yr = . if sttime < 0;
	la var firm_age_yr "year-specific firm age";
	
	g agency_blyr_dum = 0;
	replace agency_blyr_dum = 1 if year > agency_blacklist_yr;
	la var agency_blyr_dum "year-specific blacklist dummy" ; 
	
	g comp_int = complaints/migrant;
	la var comp_int "Complaint intensity by departure month of migrant";
	
	g inci_comp_int = inci_complaints /migrant;
	la var inci_comp_int "Complaint intensity by incident month of complaint";
	
	

g filed_comp_int = filed_complaints /migrant;
	la var filed_comp_int "Complaint intensity by filing year of complaint";

* complaint resolution;

	gen solv_months  = total_complaint_months/filed_cmpl_solved;
	label var solv_months "average time to solve a complaint";
	
	g solved_rate = filed_cmpl_solved/filed_complaints; 
	la var solved_rate "Complaint resolved rate using year of complaint";
	
	g harass_solved_rate = filed_harass_cmpl_solved/filed_harass; 
	la var harass_solved_rate "Harassment complaint resolved rate using year of complaint";
	
	g breach_solved_rate = filed_breach_cmpl_solved/(filed_breach + filed_nonpay); 
	la var breach_solved_rate "Breach complaint resolved rate using year of complaint";

	g solved_rate_1yr = filed_cmpl_solved_within_1yr/filed_complaints; 
	la var solved_rate_1yr "Complaint resolved rate within 1 yr using year of complaint";

	g solved_rate_2yr = filed_cmpl_solved_within_2yr/filed_complaints; 
	la var solved_rate_2yr "Complaint resolved rate within 1 yr using year of complaint";


* generate share of migrant in a firm by specific characteristic;

	foreach x in men women low_skill medium_skill high_skill
		saudi qatar uae kuwait jordan bahrain lebanon malaysia {;			
			g mig_share_`x' = `x' / migrant;
			la var mig_share_`x' "share of migrants in firm - `x' ";
	};

				
*********************************************************************************;	
* treatment and rating variables;

	bys agency_id: egen elig_migs_tmp = total(migrant) if inlist(year,2009),m ;
	bys agency_id: egen elig_migs = max(elig_migs_tmp) ;
	la var elig_migs "Num migrants 2009";  
	drop elig_migs_tmp;
			



	g sample_2012 = (elig_migs> 0 & elig_migs!=.);
	replace sample_2012 = 1 if elig_migs == 0 & agency_license_yr < 2011;
	
	g elig_firm = (elig_migs >=100 & sample_2012 == 1);
	replace elig_firm = . if sample_2012 == 0; 
	la var elig_firm "Treatment agency, migs >= 100";
		
	replace agency_star_2012 = 0 if elig_firm == 1 & agency_star_2012==.;
	
	lab define starz 0 "No rating" 1 "1-star" 2 "2-star" 3 "3-star"  4 "4-star" 5 "5-star";
	lab values agency_star_2012 starz;
			
	g post = (year > 2012) ;
	replace post = 1 if (year==2012 & month >2);
	la var post "Post - March 2012 and above";
	
	g elig_post = (elig_firm == 1 & post == 1);
	la var elig_post "Treat * Post- varies before and after 2012";
	
* create variable for firm exit; 
	bys agency_id year: egen migs_y = total(migrant);
	la var migs_y "migrants sent at the agency-year level";

	gen exit_1yr = (migs_y == 0 & year == 2013); 
	la var exit_1yr "firms the have zero migrants, 1 yr afer 2012"; 

	gen exit_2yr = (migs_y == 0 & year == 2014); 
	la var exit_2yr "firms the have zero migrants, 2 yrs afer 2012"; 

*  vars defns for robustness; 
	cap ssc install winsor; 
	gen mean_salary = usd_salary/migrant; 
	winsor mean_salary, p(0.025) gen(mean_salary_win); 
	label var mean_salary_win "mean usd salary with 0.025 winsor"; 
	
	
	gen mean_salary_d = usd_salary_d/migrant; 
	winsor mean_salary_d, p(0.025) gen(mean_salary_d_win); 
	label var mean_salary_d_win "mean usd salary with 0.025 winsor, 2015 dollars"; 

	gen log_salary = log(usd_salary);
	label var log_salary "log of usd salary"; 
	
	winsor usd_salary, p(0.025) gen(usd_salary_win);
	label var usd_salary_win "usd salary with 0.025 winsor"; 
	
	winsor usd_salary_d, p(0.025) gen(usd_salary_d_win);
	label var usd_salary_d_win "usd salary with 0.025 winsor, 2015 dollars"; 

	winsor migrant, p(0.025) gen(migrant_win); 
	label var migrant_win "migrants with 0.025 winsor"; 
	
	winsor comp_int, p(0.025) gen(comp_win); 
	label var comp_win "complaint intensity with 0.025 winsor"; 
	
	gen domestic_worker_share = domestic_worker/migrant;
	label var domestic_worker_share "share of recruits who are domestic workers";
	
/* we recode observations as missing if obs are coded as 0 before the license year, i.e. 
you can't send zero migrants if you don't exist. Recode
observations as 0 if missing after license year, i.e. you're active but you didn't send anyone */
	
* agencies that send migrants before their license year;
	g mtag = 1 if (elig_migs > 0 & elig_migs!=.) & agency_license_yr>2010;
	replace mtag = 1 if agency_id == 30909 |  agency_id == 31071; 

	drop atag;
	ds agency_* year month sttime mtag vil_* elig_migs sample_2012 elig_firm,not;
	
	foreach x in `r(varlist)' {;
		replace `x' = . if sttime < 0 & mtag!=1; //an agency can't send a migrant if it doesn't exist
		replace `x' = 0 if `x' ==. & sttime >= 0;

	};

	rename mtag agency_mistake_license;
	la var  agency_mistake_license "Agency shown to send migrants before licensing";
		
* no or too few job order data in 2005 and 2014; 

	ds num_job_orders *_vac_* fill_rate ticket_share domestic_share med_share maxsalary; 
	
*there is no job order data in 2005 and 2014 so make those missing;

	foreach x in `r(varlist)' {;
		replace `x' = . if year == 2014;
		replace `x' = . if year == 2005;
	};
		
	winsor num_job_orders, p(0.025) gen(job_order_win);
	label var job_order_win "job orders with 0.025 winsor"; 
		
*********************************************************************************;	
* save data;
		
	save ../dta_secure/3_firm_mn.dta, replace;
	
	
